### Descriptive statistics
rm(list = ls(all.names = TRUE))
gc() 

data <- read.csv("intermediary_outputs/base_with_connection_data.csv")

## Descriptive for whole sample and by race -------
descrip_tab_whole_race <- function(var_list) {
  counter = 0
  for (var_of_int in var_list) {
    # var_of_int = "skin_color"
    counter = counter + 1
    
    whole_sample <- which(data$flag_select_sample == 0)
    whites_int <- which(data$race_factor == "White" & 
                          data$flag_select_sample == 0)
    
    browns_int <- which(data$race_factor == "Brown" & 
                          data$flag_select_sample == 0)
    
    blacks_int <- which(data$race_factor == "Black" & 
                          data$flag_select_sample == 0)
    
    mean_whole <- round(mean(data[whole_sample,var_of_int], na.rm = T),3)
    sd_whole   <- paste0("(",round(sd(data[whole_sample,var_of_int], 
                                      na.rm = T),3),")")
    
    mean_wh <- round(mean(data[whites_int,var_of_int], na.rm = T),3)
    sd_wh   <- paste0("(",round(sd(data[whites_int,var_of_int], 
                                   na.rm = T),3),")")
    
    mean_br <- round(mean(data[browns_int,var_of_int], na.rm = T),3)
    sd_br   <- paste0("(",round(sd(data[browns_int,var_of_int], 
                                   na.rm = T),3),")")
    
    mean_bl <- round(mean(data[blacks_int,var_of_int], na.rm = T),3)
    sd_bl   <- paste0("(",round(sd(data[blacks_int,var_of_int], 
                                   na.rm = T),3),")")
    
    table_prov <- 
      matrix(data = c(mean_whole, sd_whole, mean_wh, sd_wh, mean_br, sd_br, 
                      mean_bl, sd_bl),
             ncol = 8,
             byrow = T)
    
    rownames(table_prov) <- c(var_of_int)
    colnames(table_prov) <- c("All", "", "White", "","Brown", "","Black","")
    
    prop.table(table(data$race))
    prop.table(table(data$race_simple[which(data$flag_select_sample == 0 & 
                                              data$race < 4)]))
    
    n_obs <- data %>% 
      subset(data$flag_select_sample == 0 & race < 4 ) %>% 
      group_by(race) %>% 
      summarise(n())
    
    total_sample <- length(which(data$flag_select_sample == 0 & data$race < 4))
    
    n_obs_final <- rep("", 8)
    n_obs_final[1] <- as.character(total_sample)
    n_obs_final[3] <- as.character(n_obs[1,2])
    n_obs_final[5] <- as.character(n_obs[3,2])
    n_obs_final[7] <- as.character(n_obs[2,2])
    
    if (counter == 1) {
      
      table_final <- t(n_obs_final)
      table_final <- rbind(table_final, table_prov)
    } else {
      
      table_final <- rbind(table_final, table_prov)
    }
    
  }
  return(table_final)
}

main_desc <- c("grades_score", "friends_2", "white_friends_2", 
               "nonwhite_friends_2", "ssi_same_negro_na_2", 
               "ssi_other_race_negro_na_2", "ssi_all_race_2")

exp_desc_whole_race <- descrip_tab_whole_race(main_desc)

rownamings <- c("Obs.", "Grades score", "Friends", "White friends", 
                "Nonwhite friends", "Same-race social status", 
                "Other-race social status", "All-race social status")

rownames(exp_desc_whole_race) <- rownamings 

# Table 1
stargazer(exp_desc_whole_race, 
          title = "Descriptive statistics", label = "quick_descriptive",
          column.separate = c(2,2,2,2),
          column.labels = c("All", "White", "Brown", "Black"),
          out = "tables/quick_descriptive.tex")

# Descriptive table for the appendix    

# Descriptive table for the appendix    
vars_desc_appendix  <-c("male", "catholic", "evangelic",
                        "age_grade_distortion",
                        "skin_color", 
                        "scores_poverty", "score_neighborhood_quality",
                        "score_study", "score_self_esteem", 
                        "score_parents_support")

var_names_appendix <- c("Male", "Catholic", "Evangelic", 
                        "Age Grade Distortion", "Skin color", "Poverty index", 
                        "Neighborhood index", "Effort index", 
                        "Self-esteem index", "Parents support index")

appendix_descriptive <- descrip_tab_whole_race(vars_desc_appendix)

rownames(appendix_descriptive) <- c("Obs", var_names_appendix)

# Table S1
stargazer(appendix_descriptive, 
          title = "Complementary descriptive statistics", 
          label = "desc_appendix",
          column.separate = c(2,2,2,2),
          column.labels = c("All", "White", "Brown", "Black"),
          out = "tables/desc_appendix.tex")

# Table with answers to racism question:
# Reverse order of question:
data$reverse_q9 <- NA
data$reverse_q9[which(data$q9 == 1)] <- 4
data$reverse_q9[which(data$q9 == 2)] <- 3
data$reverse_q9[which(data$q9 == 3)] <- 2
data$reverse_q9[which(data$q9 == 4)] <- 1

q1 <- round(prop.table(table(data$reverse_q9))*100,2)
q2 <- round(prop.table(table(data$q12))*100,2)
q3 <- round(prop.table(table(data$q57))*100,2)
q4 <- round(prop.table(table(data$q60))*100,2)

table_racism  <- rbind(q1, q2, q3, q4)

questions <-
  c("I don’t care if I have white or black friends",
  "White people can generally get better jobs than black people can,
  because white people is more disciplined regarding companies rules",
  "In a democratic society, black people should behave like most white people do",
  "Black people are generally less concerned with work than others are")

answers <- c("Strongly disagrees", "Disagrees", "Agrees", "Strongly agrees")
rownames(table_racism) <- questions
colnames(table_racism) <- answers

# Table S4
stargazer(table_racism, 
          title = "Racial ideology among Brazilian students", 
          label = "tab:racism",
          digits = 2, out = "tables/tab_racism.tex")

# Table with answers to racism question, by race: 
nonwhites <- which(data$negro_na == 1)
whites <- which(data$negro_na == 0)
q1_nw <- round(prop.table(table(data$reverse_q9[nonwhites]))*100,2)
q2_nw <- round(prop.table(table(data$q12[nonwhites]))*100,2)
q3_nw <- round(prop.table(table(data$q57[nonwhites]))*100,2)
q4_nw <- round(prop.table(table(data$q60[nonwhites]))*100,2)

q1_wh <- round(prop.table(table(data$reverse_q9[whites]))*100,2)
q2_wh <- round(prop.table(table(data$q12[whites]))*100,2)
q3_wh <- round(prop.table(table(data$q57[whites]))*100,2)
q4_wh <- round(prop.table(table(data$q60[whites]))*100,2)

table_racism_race  <- rbind(rep("",4), q1_nw, q1_wh, rep("",4), q2_nw, q2_wh, 
                            rep("",4), q3_nw, q3_wh, rep("",4), q4_nw, q4_wh)

questions_race <- 
  c("I don’t care if I have white or black friends", 
    "Nonwhite", "White",
    "White people can generally get better jobs than black people can,
    because white people is more disciplined regarding companies rules",
    "Nonwhite", "White",
    "In a democratic society, black people should behave like most white people do",
    "Nonwhite", "White",
    "Black people are generally less concerned with work than others are",
    "Nonwhite", "White")

rownames(table_racism_race) <- questions_race
colnames(table_racism_race) <- answers

# Table S5
stargazer(table_racism_race,
          title = "Racial ideology among Brazilian students, by racial identification",
          label = "racial_ideology_race",
          out = "tables/racial_ideology_racerace")

### Plot the distribution of skin color by racial identification ------
# Skin color of white, blacks and browns (Figure S2)
graph_densi_brown <- data %>%
  subset(race_simple <= 3) %>%
  ggplot(aes(x=skin_color, color=race_simple_factor)) +
  geom_density(size=2) + 
  scale_colour_colorblind() + 
  theme_classic(base_size = 32) +
  labs(title="", x="Skin color",
       y = "Probability", color = "", 
       legend.position=c(.5,0.9)) + 
  theme(legend.position = c(.9,.9))

print(graph_densi_brown)    
pdf("figures/graph_densi_brown.pdf",  width = 16, height = 8)
print(graph_densi_brown)    
dev.off()

# Size of overlap among browns:    
data <- setDT(data)
100*round(prop.table(table(data[race_simple <= 3, 
                                list(skin_color, race_simple)]), 2),3)

data[race_simple == 3 & skin_color <= 10 & skin_color >= 7, 
     overlap_moreno := 0]
data[race_simple == 3 & skin_color <= 6 & skin_color >= 1, 
     overlap_moreno := 1]
data[race_simple == 3 & skin_color <= 15 & skin_color >= 11, 
     overlap_moreno := 2]

table(data[race_simple == 3, overlap_moreno])

